These scripts were created by J.E. Panzik for SEACAR.
All scripts and outputs can be found on the SEACAR GitHub repository:
https://github.com/FloridaSEACAR/SEACAR_Panzik
Note: The top 2% of data is excluded when computing mean and standard deviations in plotting sections solely for the purpose of getting y-axis scales. The exclusion of the top 2% is not used in any statistics that are exported.
Loads libraries used in the script. The inclusion of scipen option limits how frequently R defaults to scientific notation.
library(knitr)
library(data.table)
library(dplyr)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.2
library(ggplot2)
library(ggpubr)
library(scales)
library(EnvStats)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.1.2
library(kableExtra)
windowsFonts(`Segoe UI` = windowsFont('Segoe UI'))
options(scipen=999)
opts_chunk$set(warning=FALSE, message=FALSE, dpi=200)
Imports file that is determined in the WC_Continuous_parameter_ReportCompile.R script.
The command fread is used because of its improved speed while handling large data files. Only columns that are used by the script are imported from the file, and are designated in the select input.
The script then gets the name of the parameter as it appears in the data file and units of the parameter.
data <- fread(file_in, sep="|", header=TRUE, stringsAsFactors=FALSE,
select=c("ManagedAreaName", "ProgramID", "ProgramName",
"ProgramLocationID", "SampleDate", "Year", "Month",
"RelativeDepth", "ActivityType", "ParameterName",
"ResultValue", "ParameterUnits", "ValueQualifier",
"SEACAR_QAQCFlagCode", "Include"),
na.strings="")
parameter <- unique(data$ParameterName)
unit <- unique(data$ParameterUnits)
Most data filtering is performed on export from the database, and is indicated by the Include variable. Include values of 1 indicate the data should be used for analysis, values of 0 indicate the data should not be used for analysis. Documentation on the database filtering is provided here: SEACAR Documentation- Analysis Filters and Calculations.docx
The filtering that is performed by the script at this point removes rows that are missing values for ResultValue and RelativeDepth, and removes any activity type that has “Blank” in the description. Data passes the filtering the process if it is has an Include value of 1.
Creates a variable for each MonitoringID which is defined as a unique combination of ManagedAreaName, ProgramID, ProgramAreaName, and ProgramLocationID.
After the initial filtering, a second filter variable is created to determine whether enough time is represented in the monitoring location, which is that each monitoring location has 5 year or more of unique year entries and have at least 2 consecutive years of observations with at least 2 repeating months for observations that pass the initial filter. If data passes the first set of filtering criteria and the time criteria, they are used in the analysis.
The function that determines whether a monitoring location has at least 2 consecutive years of observations with at least 2 repeating months takes the data, creates a list of the monitoring IDs and cycles through each monitoring ID. For each monitoring ID cycle:
A data frame is created that stores summary information for each monitoring location. This information is stored and combined with the results of the Seasonal Kendall Tau analysis and export to a data file once combined.
The sufficient data qualifier is merged with the original data, and a variable Use_In_Analysis is created to indicate what data should be used.
A variable with the monitoring IDs that pass all criteria is created and stored.
data$Include <- as.logical(data$Include)
data <- data[data$Include==TRUE,]
data <- data[!is.na(data$ResultValue),]
data <- data[!is.na(data$RelativeDepth),]
data <- data[!grep("Blank", data$ActivityType),]
if(param_name=="Water_Temperature"){
data <- data[data$ResultValue>=-5,]
} else{
data <- data[data$ResultValue>=0,]
}
MA_All_Region <- MA_All[MA_All$Region==region,]
data <- merge.data.frame(MA_All_Region[,c("AreaID", "ManagedAreaName")],
data, by="ManagedAreaName", all=TRUE)
data <- data %>%
group_by(AreaID, ManagedAreaName, ProgramID, ProgramName,
ProgramLocationID) %>%
mutate(MonitoringID=cur_group_id())
ContinuousConsecutiveCheck <- function(con_data){
IDs <- unique(con_data$MonitoringID[con_data$Include==TRUE &
!is.na(con_data$Include)])
for(i in 1:length(IDs)) {
Years <- unique(con_data$Year[con_data$MonitoringID==IDs[i] &
con_data$Include==TRUE &
!is.na(con_data$Include)])
Years <- Years[order(Years)]
if(length(Years)<2) {
next
}
for(j in 2:length(Years)) {
if(Years[j]-Years[j-1]!=1) {
next
}
Months1 <- unique(con_data$Month[con_data$MonitoringID==IDs[i] &
con_data$Year==Years[j-1] &
con_data$Include==TRUE &
!is.na(con_data$Include)])
Months2 <- unique(con_data$Month[con_data$MonitoringID==IDs[i] &
con_data$Year==Years[j] &
con_data$Include==TRUE &
!is.na(con_data$Include)])
if(length(intersect(Months1, Months2))>=2) {
if(exists("consecutive")==FALSE){
consecutive <- IDs[i]
break
} else{
consecutive <- append(consecutive, IDs[i])
break
}
}
}
}
return(consecutive)
}
consMonthIDs <- ContinuousConsecutiveCheck(data)
Mon_Summ <- data %>%
group_by(MonitoringID, AreaID, ManagedAreaName, ProgramID, ProgramName,
ProgramLocationID) %>%
summarize(ParameterName=parameter,
RelativeDepth=unique(RelativeDepth),
N_Data=length(ResultValue[Include==TRUE & !is.na(ResultValue)]),
N_Years=length(unique(Year[Include==TRUE & !is.na(Year)])),
EarliestYear=min(Year[Include==TRUE]),
LatestYear=max(Year[Include==TRUE]),
LastSampleDate=max(SampleDate[Include==TRUE]),
ConsecutiveMonths=ifelse(unique(MonitoringID) %in%
consMonthIDs==TRUE, TRUE, FALSE),
SufficientData=ifelse(N_Data>0 & N_Years>=suff_years &
ConsecutiveMonths==TRUE, TRUE, FALSE),
Median=median(ResultValue, na.rm=TRUE))
Mon_Summ$ConsecutiveMonths <- NULL
Mon_Summ <- as.data.table(Mon_Summ[order(Mon_Summ$MonitoringID), ])
data <- data %>%
group_by(MonitoringID) %>%
mutate(YearFromStart=Year-min(Year))
data <- merge.data.frame(data, Mon_Summ[,c("MonitoringID", "SufficientData")],
by="MonitoringID")
data$Use_In_Analysis <- ifelse(data$Include==TRUE & data$SufficientData==TRUE,
TRUE, FALSE)
Mon_IDs <- unique(data$MonitoringID[data$Use_In_Analysis==TRUE])
Mon_IDs <- Mon_IDs[order(Mon_IDs)]
n <- length(Mon_IDs)
Gets summary statistics for each monitoring location. Excluded monitoring locations are not included into whether the data should be used or not. Uses piping from dplyr package to feed into subsequent steps. The following steps are performed:
data variable and only include rows that have a Use_In_Analysis value of TRUEManagedAreaName, ProgramID, ProgramName, ProgramLocationID, Year, and Month.
Year.Month.ManagedAreaName, ProgramID, ProgramName, ProgramLocationID, Year, and Month in that order.Because the continuous data is extensive and most measurements are taken every 15 minutes, a daily average is determined and used based on grouping ManagedAreaName, ProgramID, ProgramName, ProgramLocationID, and SampleDate. The new ResultValue is the mean of all values on that date from that specific monitoring location. Sets the SampleDate as a date object, and creates various scales of the date to be used by plotting functions.
Mon_YM_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(MonitoringID, AreaID, ManagedAreaName, ProgramID, ProgramName,
ProgramLocationID, Year, Month) %>%
summarize(ParameterName=parameter,
RelativeDepth=unique(RelativeDepth),
EarliestSampleDate=min(SampleDate),
LastSampleDate=max(SampleDate), N=length(ResultValue),
Min=min(ResultValue), Max=max(ResultValue),
Median=median(ResultValue), Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue))
Mon_YM_Stats <- as.data.table(Mon_YM_Stats[order(Mon_YM_Stats$ManagedAreaName,
Mon_YM_Stats$ProgramID,
Mon_YM_Stats$ProgramName,
Mon_YM_Stats$ProgramLocationID,
Mon_YM_Stats$Year,
Mon_YM_Stats$Month), ])
fwrite(select(Mon_YM_Stats, -MonitoringID),
paste0(out_dir,"/", param_name, "_", region,
"_MonitoringLoc_YearMonth_Stats.txt"), sep="|")
Mon_YM_Stats <- Mon_YM_Stats %>%
group_by(MonitoringID) %>%
mutate(YearFromStart=Year-min(Year))
Mon_YM_Stats$YearMonthDec <- Mon_YM_Stats$Year + ((Mon_YM_Stats$Month-0.5) / 12)
Mon_Y_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(AreaID, ManagedAreaName, ProgramID, ProgramName, ProgramLocationID,
Year) %>%
summarize(ParameterName=parameter,
RelativeDepth=unique(RelativeDepth),
EarliestSampleDate=min(SampleDate),
LastSampleDate=max(SampleDate), N=length(ResultValue),
Min=min(ResultValue), Max=max(ResultValue),
Median=median(ResultValue), Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue))
Mon_Y_Stats <- as.data.table(Mon_Y_Stats[order(Mon_Y_Stats$ManagedAreaName,
Mon_Y_Stats$ProgramID,
Mon_Y_Stats$ProgramName,
Mon_Y_Stats$ProgramLocationID,
Mon_Y_Stats$Year), ])
fwrite(Mon_Y_Stats, paste0(out_dir,"/", param_name, "_", region,
"_MonitoringLoc_Year_Stats.txt"), sep="|")
Mon_M_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(AreaID, ManagedAreaName, ProgramID, ProgramName, ProgramLocationID,
Month) %>%
summarize(ParameterName=parameter,
RelativeDepth=unique(RelativeDepth),
EarliestSampleDate=min(SampleDate),
LastSampleDate=max(SampleDate), N=length(ResultValue),
Min=min(ResultValue), Max=max(ResultValue),
Median=median(ResultValue), Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue))
Mon_M_Stats <- as.data.table(Mon_M_Stats[order(Mon_M_Stats$ManagedAreaName,
Mon_M_Stats$ProgramID,
Mon_M_Stats$ProgramName,
Mon_M_Stats$ProgramLocationID,
Mon_M_Stats$Month), ])
fwrite(Mon_M_Stats, paste0(out_dir,"/", param_name, "_", region,
"_MonitoringLoc_Month_Stats.txt"), sep="|")
data <- data %>%
group_by(MonitoringID, AreaID, ManagedAreaName, ProgramID, ProgramName,
ProgramLocationID, SampleDate) %>%
summarise(Year=unique(Year), Month=unique(Month),
RelativeDepth=unique(RelativeDepth),
ResultValue=mean(ResultValue), Include=unique(Include),
Use_In_Analysis=unique(Use_In_Analysis))
data$SampleDate <- as.Date(data$SampleDate)
data$YearMonth <- format(data$SampleDate, format = "%m-%Y")
data$YearMonthDec <- data$Year + ((data$Month-0.5) / 12)
data$DecDate <- decimal_date(data$SampleDate)
Gets seasonal Kendall Tau statistics using the kendallSeasonalTrendTest from the EnvStats package. The Trend parameter is determined from a user-defined function based on the median, Senn slope, and p values from the data. Analysis modified from that performed at The Water Atlas: https://sarasota.wateratlas.usf.edu/water-quality-trends/#analysis-overview
The following steps are performed:
Define the trend function.
Take the data variable and only include rows that have a Use_In_Analysis value of TRUE
Group data that have the same ManagedAreaName, ProgramID, ProgramName, and ProgramLocationID.
For each group, provides the following information: Earliest Sample Date (EarliestSampleDate), Latest Sample Date (LastSampleDate), Number of Entries (N), Lowest Value (Min), Largest Value (Max), Median, Mean, Standard Deviation,
For each group, a temporary variable is created to run the kendallSeasonalTrendTest function using the Year values for year, and Month as the seasonal qualifier, and Trend.
TRUE indicates that the data should be treated as not being serially auto-correlated. An independent.obs value of FALSE indicates that it is treated as being serially auto-correlated, but also requires one observation per season per year for the full time of observation.The two stats tables are merged based on similar groups, and then Trend is determined from the user-defined function.
Write summary stats to a pipe-delimited .txt file in the output directory
After the analysis is performed, a variable is created that stores the x & y coordinates of the SKT trend line to be used for plotting
tauSeasonal <- function(dat, independent, stats.median, stats.minYear,
stats.maxYear) {
tau <- NULL
tryCatch({ken <- kendallSeasonalTrendTest(
y=dat$Mean,
season=dat$Month,
year=dat$YearFromStart,
independent.obs=independent)
tau <- ken$estimate[1]
p <- ken$p.value[2]
slope <- ken$estimate[2]
intercept <- ken$estimate[3]
chi_sq <- ken$statistic[1]
p_chi_sq <- ken$p.value[1]
trend <- trend_calculator(slope, stats.median, p)
rm(ken)
}, warning=function(w) {
print(w)
}, error=function(e) {
print(e)
}, finally={
if (!exists("tau")) {
tau <- NA
}
if (!exists("p")) {
p <- NA
}
if (!exists("slope")) {
slope <- NA
}
if (!exists("intercept")) {
intercept <- NA
}
if (!exists("trend")) {
trend <- NA
}
})
KT <-c(unique(dat$MonitoringID),
independent,
tau,
p,
slope,
intercept,
chi_sq,
p_chi_sq,
trend)
return(KT)
}
runStats <- function(dat, med, minYr, maxYr) {
#dat$Index <- as.Date(dat$SampleDate) # , "%Y-%m-%d")
dat$Mean <- as.numeric(dat$Mean)
# Calculate basic stats
stats.median <- med
stats.minYear <- minYr
stats.maxYear <- maxYr
# Calculate Kendall Tau and Slope stats,
# then update appropriate columns and table
KT <- tauSeasonal(dat, TRUE, stats.median,
stats.minYear, stats.maxYear)
if (is.null(KT[8])) {
KT <- tauSeasonal(dat, FALSE, stats.median,
stats.minYear, stats.maxYear)
}
if (is.null(KT.Stats)==TRUE) {
KT.Stats <- KT
} else{
KT.Stats <- rbind(KT.Stats, KT)
}
return(KT.Stats)
}
trend_calculator <- function(slope, median_value, p) {
trend <-
if (p < .05 & abs(slope) > abs(median_value) / 10.) {
if (slope > 0) {
2
}
else {
-2
}
}
else if (p < .05 & abs(slope) < abs(median_value) / 10.) {
if (slope > 0) {
1
}
else {
-1
}
}
else
0
return(trend)
}
KT.Stats <- NULL
# Loop that goes through each managed area.
# List of managed areas stored in MA_Years$ManagedAreaName
c_names <- c("MonitoringID", "Independent", "tau", "p",
"SennSlope", "SennIntercept", "ChiSquared", "pChiSquared", "Trend")
if(n==0){
KT.Stats <- data.frame(matrix(ncol=length(c_names),
nrow=nrow(Mon_Summ)))
colnames(KT.Stats) <- c_names
KT.Stats[, c("MonitoringID")] <- Mon_Summ[, c("MonitoringID")]
} else{
for (i in 1:n) {
x <- nrow(Mon_YM_Stats[Mon_YM_Stats$MonitoringID==Mon_IDs[i], ])
if (x>0) {
SKT.med <- Mon_Summ$Median[Mon_Summ$MonitoringID==Mon_IDs[i]]
SKT.minYr <- Mon_Summ$EarliestYear[Mon_Summ$MonitoringID==Mon_IDs[i]]
SKT.maxYr <- Mon_Summ$LatestYear[Mon_Summ$MonitoringID==Mon_IDs[i]]
KT.Stats <- runStats(Mon_YM_Stats[Mon_YM_Stats$MonitoringID==
Mon_IDs[i], ],
SKT.med, SKT.minYr, SKT.maxYr)
}
}
KT.Stats <- as.data.frame(KT.Stats)
if(dim(KT.Stats)[2]==1){
KT.Stats <- as.data.frame(t(KT.Stats))
}
colnames(KT.Stats) <- c_names
rownames(KT.Stats) <- seq(1:nrow(KT.Stats))
KT.Stats$tau <- round(as.numeric(KT.Stats$tau), digits=4)
KT.Stats$p <- round(as.numeric(KT.Stats$p), digits=4)
KT.Stats$SennSlope <- as.numeric(KT.Stats$SennSlope)
KT.Stats$SennIntercept <- as.numeric(KT.Stats$SennIntercept)
KT.Stats$ChiSquared <- round(as.numeric(KT.Stats$ChiSquared), digits=4)
KT.Stats$pChiSquared <- round(as.numeric(KT.Stats$pChiSquared), digits=4)
KT.Stats$Trend <- as.integer(KT.Stats$Trend)
}
KT.Stats <- merge.data.frame(Mon_Summ, KT.Stats,
by=c("MonitoringID"), all=TRUE)
KT.Stats <- as.data.table(KT.Stats[order(KT.Stats$MonitoringID), ])
#KT.Stats$MonitoringID <- NULL
fwrite(select(KT.Stats, -MonitoringID), paste0(out_dir,"/", param_name, "_",
region, "_KendallTau_Stats.txt"),
sep="|")
#KT.Stats$MonitoringID <- Mon_Summ$MonitoringID
data <- data[!is.na(data$ResultValue),]
KT.Plot <- KT.Stats %>%
group_by(MonitoringID) %>%
summarize(x=EarliestYear,
y=SennIntercept)
KT.Plot2 <- KT.Stats %>%
group_by(MonitoringID) %>%
summarize(x=decimal_date(LastSampleDate),
y=(x-EarliestYear)*SennSlope+SennIntercept)
KT.Plot <- bind_rows(KT.Plot, KT.Plot2)
rm(KT.Plot2)
KT.Plot <- as.data.table(KT.Plot[order(KT.Plot$MonitoringID), ])
KT.Plot <- KT.Plot[!is.na(KT.Plot$y),]
Box plots are created by using the entire data set and excludes any data that has been previously filtered out. The scripts that create plots follow this format
Use_In_Analysis of TRUEThis set of box plots are grouped by year.
plot_theme <- theme_bw() +
theme(text=element_text(family="Segoe UI"),
title=element_text(face="bold"),
plot.title=element_text(hjust=0.5, size=14, color="#314963"),
plot.subtitle=element_text(hjust=0.5, size=10, color="#314963"),
axis.title.x = element_text(margin = margin(t = 5, r = 0,
b = 10, l = 0)),
axis.title.y = element_text(margin = margin(t = 0, r = 10,
b = 0, l = 0)),
axis.text=element_text(size=10),
axis.text.x=element_text(face="bold", angle = 60, hjust = 1),
axis.text.y=element_text(face="bold"))
min_RV <- min(data$ResultValue[data$Include==TRUE])
mn_RV <- mean(data$ResultValue[data$Include==TRUE &
data$ResultValue <
quantile(data$ResultValue, 0.98)])
sd_RV <- sd(data$ResultValue[data$Include==TRUE &
data$ResultValue <
quantile(data$ResultValue, 0.98)])
y_scale <- mn_RV + 4 * sd_RV
p1 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=SampleDate, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Autoscale", x="Year",
y=paste0("Values (", unit, ")")) +
plot_theme
p2 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=SampleDate, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation", x="Year",
y=paste0("Values (", unit, ")")) +
ylim(0, y_scale) +
plot_theme
p3 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(0, y_scale) +
scale_x_continuous(limits=c(max(data$Year) - 10.5, max(data$Year)+0.5),
breaks=seq(max(data$Year) - 10, max(data$Year), 2)) +
plot_theme
set <- ggarrange(p1, p2, p3, ncol=1)
p0 <- ggplot() + labs(title="Summary Box Plots for Entire Data",
subtitle="By Year") + plot_theme +
theme(panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
Yset <- ggarrange(p0, set, ncol=1, heights=c(0.07, 1))
This set of box plots are grouped by year and month with the color being related to the month.
p1 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Autoscale", x="Year",
y=paste0("Values (", unit, ")"), color="Month") +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(color=guide_legend(nrow=1))
p2 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 5x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(0, y_scale) +
plot_theme +
theme(legend.position="none")
p3 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 5x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(0, y_scale) +
scale_x_continuous(limits=c(max(data$Year) - 10.5, max(data$Year)+0.5),
breaks=seq(max(data$Year) - 10, max(data$Year), 2)) +
plot_theme +
theme(legend.position="none")
leg <- get_legend(p1)
set <- ggarrange(leg, p1 + theme(legend.position="none"), p2, p3, ncol=1,
heights=c(0.1, 1, 1, 1))
p0 <- ggplot() + labs(title="Summary Box Plots for Entire Data",
subtitle="By Year & Month") + plot_theme +
theme(panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
YMset <- ggarrange(p0, set, ncol=1, heights=c(0.07, 1))
The following box plots are grouped by month with fill color being related to the month. This is designed to view potential seasonal trends.
p1 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Autoscale", x="Month",
y=paste0("Values (", unit, ")"), fill="Month") +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(fill=guide_legend(nrow=1))
p2 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 5x Standard Deviation",
x="Month", y=paste0("Values (", unit, ")")) +
ylim(0, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
p3 <- ggplot(data=data[data$Include==TRUE &
data$Year >= max(data$Year) - 10, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 5x Standard Deviation, Last 10 Years",
x="Month", y=paste0("Values (", unit, ")")) +
ylim(0, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
leg <- get_legend(p1)
set <- ggarrange(leg, p1 + theme(legend.position="none"), p2, p3, ncol=1,
heights=c(0.1, 1, 1, 1))
p0 <- ggplot() + labs(title="Summary Box Plots for Entire Data",
subtitle="By Month") + plot_theme +
theme(panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
Mset <- ggarrange(p0, set, ncol=1, heights=c(0.07, 1))
Scatter plots of data values are created for monitoring locations that have fewer than 5 separate years of data entries.
Mon_Exclude <- Mon_Summ[Mon_Summ$SufficientData==FALSE & N_Years>0,]
Mon_Exclude <- Mon_Exclude[order(Mon_Exclude$MonitoringID),]
z=nrow(Mon_Exclude)
if(z==0){
print("There are no monitoring locations that qualify.")
} else {
for(i in 1:z){
MA_name <- unique(data$ManagedAreaName[
data$MonitoringID==Mon_Exclude$MonitoringID[i]])
Mon_name <- paste0(unique(data$ProgramID[
data$MonitoringID==Mon_Exclude$MonitoringID[i]]), "\n",
unique(data$ProgramName[
data$MonitoringID==Mon_Exclude$MonitoringID[i]]), "\n",
unique(data$ProgramLocationID[
data$MonitoringID==Mon_Exclude$MonitoringID[i]]))
p1<-ggplot(data=data[data$MonitoringID==Mon_Exclude$MonitoringID[i]&
data$Include==TRUE, ],
aes(x=SampleDate, y=ResultValue)) +
geom_point(shape=21, size=3, color="#333333", fill="#cccccc",
alpha=0.75) +
labs(title=paste0(MA_name, "\n",
Mon_name, " (", Mon_Exclude$N_Years[i],
" Unique Years)"),
subtitle="Autoscale", x="Year",
y=paste0("Values (", unit, ")")) +
plot_theme +
scale_x_date(labels=date_format("%m-%Y"))
print(p1)
}
}
The plots created in this section are designed to show the general trend of the data. Data is taken and grouped by MonitoringID. The trendlines on the plots are created using the Senn slope and intercept from the seasonal Kendall Tau analysis. The scripts that create plots follow this format
if(n==0){
print("There are no monitoring locations that qualify.")
} else {
for (i in 1:n) {
plot_data <- Mon_YM_Stats[Mon_YM_Stats$MonitoringID==Mon_IDs[i],]
KT.plot_data <- KT.Plot[KT.Plot$MonitoringID==Mon_IDs[i],]
t_min <- min(plot_data$Year)
t_max <- max(plot_data$YearMonthDec)
t_max_brk <- as.integer(round(t_max, 0))
t <- t_max-t_min
min_RV <- min(plot_data$Mean)
if(t>=30){
brk <- -10
}else if(t<30 & t>=10){
brk <- -5
}else if(t<10 & t>=4){
brk <- -2
}else if(t<4){
brk <- -1
}
MA_name <- KT.Stats$ManagedAreaName[KT.Stats$MonitoringID==Mon_IDs[i]]
Mon_name <- paste0(KT.Stats$ProgramID[KT.Stats$MonitoringID==Mon_IDs[i]],
"\n", KT.Stats$ProgramName[KT.Stats$MonitoringID==Mon_IDs[i]], "\n",
KT.Stats$ProgramLocationID[KT.Stats$MonitoringID==Mon_IDs[i]])
p1 <- ggplot(data=plot_data,
aes(x=YearMonthDec, y=Mean)) +
geom_line(size=0.75, color="#333333", alpha=0.6) +
geom_point(shape=21, size=3, color="#333333", fill="#cccccc",
alpha=0.75) +
geom_line(data=KT.plot_data, aes(x=x, y=y),
color="#000099", size=1.2, alpha=0.7) +
labs(title=paste0(MA_name, "\n", Mon_name),
subtitle=parameter,
x="Year", y=paste0("Values (", unit, ")")) +
scale_x_continuous(limits=c(t_min-0.25, t_max+0.25),
breaks=seq(t_max_brk, t_min, brk)) +
plot_theme
# p2 <- ggplot(data=plot_data,
# aes(x=DecDate, y=ResultValue)) +
# geom_point(shape=21, size=3, color="#333333", fill="#cccccc",
# alpha=0.75) +
# geom_line(data=KT.plot_data, aes(x=x, y=y),
# color="#000099", size=1.2, alpha=0.7) +
# ylim(min_RV-0.1*y_scale, y_scale) +
# labs(subtitle="Scaled to 4x Standard Deviation",
# x="Year", y=paste0("Values (", unit, ")")) +
# plot_theme
# KTset <- ggarrange(p1, p2, ncol=1, heights=c(1, 1))
#
# p0 <- ggplot() + labs()) +
# plot_theme + theme(panel.border=element_blank(),
# panel.grid.major=element_blank(),
# panel.grid.minor=element_blank(),
# axis.line=element_blank())
ResultTable <- KT.Stats[KT.Stats$MonitoringID==Mon_IDs[i], ] %>%
select(RelativeDepth, N_Data, N_Years, Median, Independent, tau, p,
SennSlope, SennIntercept, ChiSquared, pChiSquared, Trend)
t1 <- ggtexttable(ResultTable, rows=NULL,
theme=ttheme(base_size=10)) %>%
tab_add_footnote(text="p < 0.00005 appear as 0 due to rounding.\n
SennIntercept is intercept value at beginning of
record for monitoring location",
size=10, face="italic")
print(ggarrange(p1, t1, ncol=1, heights=c(0.85, 0.15)))
cat('\n \n \n')
rm(plot_data)
rm(KTset, leg)
rm(plot_data)
rm(KTset, leg)
}
}
Data is taken and grouped by MonitoringID. The scripts that create plots follow this format
Use_In_Analysis of TRUE for the desired monitoring locationThe following plots are arranged by MonitoringID with data grouped by Year, then Year and Month, then finally Month only. Each program area will have 3 sets of plots, each with 3 panels in them. Each panel goes as follows:
if(n==0){
print("There are no monitoring locations that qualify.")
} else {
for (i in 1:n) {
year_lower <- min(data$Year[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i]])
year_upper <- max(data$Year[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i]])
min_RV <- min(data$ResultValue[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i]])
mn_RV <- mean(data$ResultValue[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i] &
data$ResultValue <
quantile(data$ResultValue, 0.98)])
sd_RV <- sd(data$ResultValue[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i] &
data$ResultValue <
quantile(data$ResultValue, 0.98)])
x_scale <- ifelse(year_upper - year_lower > 30, 10, 5)
y_scale <- mn_RV + 4 * sd_RV
MA_name <- KT.Stats$ManagedAreaName[KT.Stats$MonitoringID==Mon_IDs[i]]
Mon_name <- paste0(KT.Stats$ProgramID[KT.Stats$MonitoringID==Mon_IDs[i]],
"\n", KT.Stats$ProgramName[KT.Stats$MonitoringID==Mon_IDs[i]], "\n",
KT.Stats$ProgramLocationID[KT.Stats$MonitoringID==Mon_IDs[i]])
##Year plots
p1 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Autoscale",
x="Year", y=paste0("Values (", unit, ")")) +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme
p2 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme
p3 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i] &
data$Year>=year_upper-10, ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_upper - 10.5, year_upper + 1),
breaks=rev(seq(year_upper, year_upper - 10,-2))) +
plot_theme
Yset <- ggarrange(p1, p2, p3, ncol=1)
p0 <- ggplot() + labs(title=paste0(MA_name, "\n", Mon_name),
subtitle="By Year") +
plot_theme + theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.line=element_blank())
## Year & Month Plots
p4 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Autoscale",
x="Year", y=paste0("Values (", unit, ")"), color="Month") +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme +
theme(legend.position="none")
p5 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")"), color="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(color=guide_legend(nrow=1))
p6 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")"), color="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_upper - 10.5, year_upper + 1),
breaks=rev(seq(year_upper, year_upper - 10,-2))) +
plot_theme +
theme(legend.position="none")
leg1 <- get_legend(p5)
YMset <- ggarrange(leg1, p4, p5 + theme(legend.position="none"), p6,
ncol=1, heights=c(0.1, 1, 1, 1))
p00 <- ggplot() + labs(title=paste0(MA_name, "\n", Mon_name),
subtitle="By Year & Month") + plot_theme +
theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
## Month Plots
p7 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Autoscale",
x="Month", y=paste0("Values (", unit, ")"), fill="Month") +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
p8 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i], ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Month", y=paste0("Values (", unit, ")"), fill="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(fill=guide_legend(nrow=1))
p9 <- ggplot(data=data[data$Use_In_Analysis==TRUE &
data$MonitoringID==Mon_IDs[i] &
data$Year >= year_upper - 10, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Month", y=paste0("Values (", unit, ")"), fill="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
leg2 <- get_legend(p8)
Mset <- ggarrange(leg2, p7, p8 + theme(legend.position="none"), p9,
ncol=1, heights=c(0.1, 1, 1, 1))
p000 <- ggplot() + labs(title=paste0(MA_name, "\n", Mon_name),
subtitle="By Month") + plot_theme +
theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
print(ggarrange(p0, Yset, ncol=1, heights=c(0.1, 1)))
print(ggarrange(p00, YMset, ncol=1, heights=c(0.1, 1)))
print(ggarrange(p000, Mset, ncol=1, heights=c(0.1, 1)))
rm(plot_data)
rm(p1, p2, p3, p4, p5, p6, p7, p8, p9, p0, p00, p000, leg1, leg2,
Yset, YMset, Mset)
}
}